[LLDB] Add `ScalarLiteralNode` and literal parsing in DIL #152308

kuilpd · 2025-08-06T13:51:21Z

This patch introduces ScalarLiteralNode without any uses by other nodes yet. It also includes lexing and parsing for integer and floating point numbers, and makes a function getAutoSenseRadix in StringRef global.

llvmbot · 2025-08-06T13:51:44Z

@llvm/pr-subscribers-llvm-support
@llvm/pr-subscribers-llvm-adt

@llvm/pr-subscribers-lldb

Author: Ilia Kuklin (kuilpd)

Changes

This patch introduces ScalarLiteralNode without any uses by other nodes yet. It also includes lexing and parsing for integer and floating point numbers, and makes a function getAutoSenseRadix in StringRef global.

Patch is 25.14 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/152308.diff

17 Files Affected:

(modified) lldb/docs/dil-expr-lang.ebnf (+3)
(modified) lldb/include/lldb/ValueObject/DILAST.h (+37)
(modified) lldb/include/lldb/ValueObject/DILEval.h (+7)
(modified) lldb/include/lldb/ValueObject/DILLexer.h (+2-1)
(modified) lldb/include/lldb/ValueObject/DILParser.h (+3)
(modified) lldb/source/ValueObject/DILAST.cpp (+5)
(modified) lldb/source/ValueObject/DILEval.cpp (+110)
(modified) lldb/source/ValueObject/DILLexer.cpp (+43-11)
(modified) lldb/source/ValueObject/DILParser.cpp (+72)
(modified) lldb/test/API/commands/frame/var-dil/basics/ArraySubscript/TestFrameVarDILArraySubscript.py (+2-2)
(modified) lldb/test/API/commands/frame/var-dil/basics/Indirection/TestFrameVarDILIndirection.py (+1-1)
(added) lldb/test/API/commands/frame/var-dil/expr/Arithmetic/Makefile (+3)
(added) lldb/test/API/commands/frame/var-dil/expr/Arithmetic/TestFrameVarDILArithmetic.py (+39)
(added) lldb/test/API/commands/frame/var-dil/expr/Arithmetic/main.cpp (+3)
(modified) lldb/unittests/ValueObject/DILLexerTests.cpp (+15-6)
(modified) llvm/include/llvm/ADT/StringRef.h (+2)
(modified) llvm/lib/Support/StringRef.cpp (+3-3)

diff --git a/lldb/docs/dil-expr-lang.ebnf b/lldb/docs/dil-expr-lang.ebnf
index 783432dabd6db..da1796d936c6a 100644
--- a/lldb/docs/dil-expr-lang.ebnf
+++ b/lldb/docs/dil-expr-lang.ebnf
@@ -31,6 +31,9 @@ identifier = ? C99 Identifier ? ;
 
 integer_literal = ? Integer constant: hexademical, decimal, octal, binary ? ;
 
+numeric_literal = ? Integer constant: hexademical, decimal, octal, binary ?
+                | ? Floating constant ? ;
+
 register = "$" ? Register name ? ;
 
 nested_name_specifier = type_name "::"
diff --git a/lldb/include/lldb/ValueObject/DILAST.h b/lldb/include/lldb/ValueObject/DILAST.h
index 709f0639135f1..a0e5909a8c6a7 100644
--- a/lldb/include/lldb/ValueObject/DILAST.h
+++ b/lldb/include/lldb/ValueObject/DILAST.h
@@ -23,6 +23,7 @@ enum class NodeKind {
   eErrorNode,
   eIdentifierNode,
   eMemberOfNode,
+  eScalarLiteralNode,
   eUnaryOpNode,
 };
 
@@ -178,6 +179,40 @@ class BitFieldExtractionNode : public ASTNode {
   int64_t m_last_index;
 };
 
+class ScalarLiteralNode : public ASTNode {
+public:
+  ScalarLiteralNode(uint32_t location, Scalar value, uint32_t radix,
+                    bool is_unsigned, bool is_long, bool is_longlong)
+      : ASTNode(location, NodeKind::eScalarLiteralNode), m_value(value),
+        m_radix(radix), m_is_unsigned(is_unsigned), m_is_long(is_long),
+        m_is_longlong(is_longlong) {}
+
+  ScalarLiteralNode(uint32_t location, Scalar value, bool is_float)
+      : ASTNode(location, NodeKind::eScalarLiteralNode), m_value(value),
+        m_is_float(is_float) {}
+
+  llvm::Expected<lldb::ValueObjectSP> Accept(Visitor *v) const override;
+
+  Scalar GetValue() const & { return m_value; }
+  uint32_t GetRadix() const { return m_radix; }
+  bool IsUnsigned() const { return m_is_unsigned; }
+  bool IsLong() const { return m_is_long; }
+  bool IsLongLong() const { return m_is_longlong; }
+  bool IsFloat() const { return m_is_float; }
+
+  static bool classof(const ASTNode *node) {
+    return node->GetKind() == NodeKind::eScalarLiteralNode;
+  }
+
+private:
+  Scalar m_value;
+  uint32_t m_radix;
+  bool m_is_unsigned;
+  bool m_is_long;
+  bool m_is_longlong;
+  bool m_is_float;
+};
+
 /// This class contains one Visit method for each specialized type of
 /// DIL AST node. The Visit methods are used to dispatch a DIL AST node to
 /// the correct function in the DIL expression evaluator for evaluating that
@@ -195,6 +230,8 @@ class Visitor {
   Visit(const ArraySubscriptNode *node) = 0;
   virtual llvm::Expected<lldb::ValueObjectSP>
   Visit(const BitFieldExtractionNode *node) = 0;
+  virtual llvm::Expected<lldb::ValueObjectSP>
+  Visit(const ScalarLiteralNode *node) = 0;
 };
 
 } // namespace lldb_private::dil
diff --git a/lldb/include/lldb/ValueObject/DILEval.h b/lldb/include/lldb/ValueObject/DILEval.h
index 45e29b3ddcd7b..22a6c5bd0af9a 100644
--- a/lldb/include/lldb/ValueObject/DILEval.h
+++ b/lldb/include/lldb/ValueObject/DILEval.h
@@ -54,6 +54,13 @@ class Interpreter : Visitor {
   Visit(const ArraySubscriptNode *node) override;
   llvm::Expected<lldb::ValueObjectSP>
   Visit(const BitFieldExtractionNode *node) override;
+  llvm::Expected<lldb::ValueObjectSP>
+  Visit(const ScalarLiteralNode *node) override;
+
+  llvm::Expected<CompilerType>
+  PickLiteralType(lldb::TypeSystemSP type_system,
+                  std::shared_ptr<ExecutionContextScope> ctx,
+                  const ScalarLiteralNode *literal);
 
   // Used by the interpreter to create objects, perform casts, etc.
   lldb::TargetSP m_target;
diff --git a/lldb/include/lldb/ValueObject/DILLexer.h b/lldb/include/lldb/ValueObject/DILLexer.h
index 9c1ba97680253..a9f01785c6c20 100644
--- a/lldb/include/lldb/ValueObject/DILLexer.h
+++ b/lldb/include/lldb/ValueObject/DILLexer.h
@@ -28,11 +28,12 @@ class Token {
     arrow,
     coloncolon,
     eof,
+    floating_constant,
     identifier,
+    integer_constant,
     l_paren,
     l_square,
     minus,
-    numeric_constant,
     period,
     r_paren,
     r_square,
diff --git a/lldb/include/lldb/ValueObject/DILParser.h b/lldb/include/lldb/ValueObject/DILParser.h
index 9eda7bac4a364..90df109337dcf 100644
--- a/lldb/include/lldb/ValueObject/DILParser.h
+++ b/lldb/include/lldb/ValueObject/DILParser.h
@@ -96,6 +96,9 @@ class DILParser {
   std::string ParseIdExpression();
   std::string ParseUnqualifiedId();
   std::optional<int64_t> ParseIntegerConstant();
+  ASTNodeUP ParseNumericLiteral();
+  ASTNodeUP ParseIntegerLiteral();
+  ASTNodeUP ParseFloatingPointLiteral();
 
   void BailOut(const std::string &error, uint32_t loc, uint16_t err_len);
 
diff --git a/lldb/source/ValueObject/DILAST.cpp b/lldb/source/ValueObject/DILAST.cpp
index b1cd824c2299e..38215ae18f6ce 100644
--- a/lldb/source/ValueObject/DILAST.cpp
+++ b/lldb/source/ValueObject/DILAST.cpp
@@ -37,4 +37,9 @@ BitFieldExtractionNode::Accept(Visitor *v) const {
   return v->Visit(this);
 }
 
+llvm::Expected<lldb::ValueObjectSP>
+ScalarLiteralNode::Accept(Visitor *v) const {
+  return v->Visit(this);
+}
+
 } // namespace lldb_private::dil
diff --git a/lldb/source/ValueObject/DILEval.cpp b/lldb/source/ValueObject/DILEval.cpp
index 6f28434c646cd..ee667e9306ca5 100644
--- a/lldb/source/ValueObject/DILEval.cpp
+++ b/lldb/source/ValueObject/DILEval.cpp
@@ -7,7 +7,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "lldb/ValueObject/DILEval.h"
+#include "lldb/Core/Module.h"
 #include "lldb/Symbol/CompileUnit.h"
+#include "lldb/Symbol/TypeSystem.h"
 #include "lldb/Symbol/VariableList.h"
 #include "lldb/Target/RegisterContext.h"
 #include "lldb/ValueObject/DILAST.h"
@@ -402,4 +404,112 @@ Interpreter::Visit(const BitFieldExtractionNode *node) {
   return child_valobj_sp;
 }
 
+static lldb::TypeSystemSP GetTypeSystemFromCU(std::shared_ptr<StackFrame> ctx) {
+  SymbolContext symbol_context =
+      ctx->GetSymbolContext(lldb::eSymbolContextCompUnit);
+  auto language = symbol_context.comp_unit->GetLanguage();
+
+  symbol_context = ctx->GetSymbolContext(lldb::eSymbolContextModule);
+  auto type_system =
+      symbol_context.module_sp->GetTypeSystemForLanguage(language);
+
+  if (type_system)
+    return *type_system;
+
+  return lldb::TypeSystemSP();
+}
+
+static CompilerType GetBasicType(lldb::TypeSystemSP type_system,
+                                 lldb::BasicType basic_type) {
+  if (type_system)
+    if (auto compiler_type = type_system.get()->GetBasicTypeFromAST(basic_type))
+      return compiler_type;
+
+  CompilerType empty_type;
+  return empty_type;
+}
+
+llvm::Expected<CompilerType>
+Interpreter::PickLiteralType(lldb::TypeSystemSP type_system,
+                             std::shared_ptr<ExecutionContextScope> ctx,
+                             const ScalarLiteralNode *literal) {
+  Scalar scalar = literal->GetValue();
+  if (scalar.GetType() == Scalar::e_float) {
+    if (literal->IsFloat())
+      return GetBasicType(type_system, lldb::eBasicTypeFloat);
+    return GetBasicType(type_system, lldb::eBasicTypeDouble);
+  } else if (scalar.GetType() == Scalar::e_int) {
+    // Binary, Octal, Hexadecimal and literals with a U suffix are allowed to be
+    // an unsigned integer.
+    bool unsigned_is_allowed =
+        literal->IsUnsigned() || literal->GetRadix() != 10;
+
+    // Try int/unsigned int.
+    uint64_t int_byte_size = 0;
+    if (auto temp = GetBasicType(type_system, lldb::eBasicTypeInt)
+                        .GetByteSize(ctx.get()))
+      int_byte_size = *temp;
+    unsigned int_size = int_byte_size * CHAR_BIT;
+    llvm::APInt apint = scalar.GetAPSInt();
+    if (!literal->IsLong() && !literal->IsLongLong() &&
+        apint.isIntN(int_size)) {
+      if (!literal->IsUnsigned() && apint.isIntN(int_size - 1))
+        return GetBasicType(type_system, lldb::eBasicTypeInt);
+      if (unsigned_is_allowed)
+        return GetBasicType(type_system, lldb::eBasicTypeUnsignedInt);
+    }
+    // Try long/unsigned long.
+    uint64_t long_byte_size = 0;
+    if (auto temp = GetBasicType(type_system, lldb::eBasicTypeLong)
+                        .GetByteSize(ctx.get()))
+      long_byte_size = *temp;
+    unsigned long_size = long_byte_size * CHAR_BIT;
+    if (!literal->IsLongLong() && apint.isIntN(long_size)) {
+      if (!literal->IsUnsigned() && apint.isIntN(long_size - 1))
+        return GetBasicType(type_system, lldb::eBasicTypeLong);
+      if (unsigned_is_allowed)
+        return GetBasicType(type_system, lldb::eBasicTypeUnsignedLong);
+    }
+    // Try long long/unsigned long long.
+    uint64_t long_long_byte_size = 0;
+    if (auto temp = GetBasicType(type_system, lldb::eBasicTypeLongLong)
+                        .GetByteSize(ctx.get()))
+      long_long_byte_size = *temp;
+    unsigned long_long_size = long_long_byte_size * CHAR_BIT;
+    if (apint.isIntN(long_long_size)) {
+      if (!literal->IsUnsigned() && apint.isIntN(long_long_size - 1))
+        return GetBasicType(type_system, lldb::eBasicTypeLongLong);
+      // If we still couldn't decide a type, we probably have something that
+      // does not fit in a signed long long, but has no U suffix. Also known as:
+      //
+      //  warning: integer literal is too large to be represented in a signed
+      //  integer type, interpreting as unsigned [-Wimplicitly-unsigned-literal]
+      //
+      return GetBasicType(type_system, lldb::eBasicTypeUnsignedLongLong);
+    }
+    return llvm::make_error<DILDiagnosticError>(
+        m_expr,
+        "integer literal is too large to be represented in any integer type",
+        literal->GetLocation());
+  }
+  return llvm::make_error<DILDiagnosticError>(
+      m_expr, "unable to create a const literal", literal->GetLocation());
+}
+
+llvm::Expected<lldb::ValueObjectSP>
+Interpreter::Visit(const ScalarLiteralNode *node) {
+  auto type_system = GetTypeSystemFromCU(m_exe_ctx_scope);
+  if (type_system) {
+    auto type = PickLiteralType(type_system, m_exe_ctx_scope, node);
+    if (type) {
+      Scalar scalar = node->GetValue();
+      return ValueObject::CreateValueObjectFromScalar(m_target, scalar, *type,
+                                                      "result");
+    } else
+      return type.takeError();
+  }
+  return llvm::make_error<DILDiagnosticError>(
+      m_expr, "unable to create a const literal", node->GetLocation());
+}
+
 } // namespace lldb_private::dil
diff --git a/lldb/source/ValueObject/DILLexer.cpp b/lldb/source/ValueObject/DILLexer.cpp
index eaefaf484bc18..00f9a0c515461 100644
--- a/lldb/source/ValueObject/DILLexer.cpp
+++ b/lldb/source/ValueObject/DILLexer.cpp
@@ -28,16 +28,18 @@ llvm::StringRef Token::GetTokenName(Kind kind) {
     return "coloncolon";
   case Kind::eof:
     return "eof";
+  case Kind::floating_constant:
+    return "floating_constant";
   case Kind::identifier:
     return "identifier";
+  case Kind::integer_constant:
+    return "integer_constant";
   case Kind::l_paren:
     return "l_paren";
   case Kind::l_square:
     return "l_square";
   case Kind::minus:
     return "minus";
-  case Kind::numeric_constant:
-    return "numeric_constant";
   case Kind::period:
     return "period";
   case Kind::r_paren:
@@ -72,12 +74,39 @@ static std::optional<llvm::StringRef> IsWord(llvm::StringRef expr,
 
 static bool IsNumberBodyChar(char ch) { return IsDigit(ch) || IsLetter(ch); }
 
-static std::optional<llvm::StringRef> IsNumber(llvm::StringRef expr,
-                                               llvm::StringRef &remainder) {
-  if (IsDigit(remainder[0])) {
-    llvm::StringRef number = remainder.take_while(IsNumberBodyChar);
-    remainder = remainder.drop_front(number.size());
-    return number;
+static std::optional<llvm::StringRef> IsNumber(llvm::StringRef &remainder,
+                                               bool &isFloat) {
+  llvm::StringRef::iterator cur_pos = remainder.begin();
+  if (*cur_pos == '.') {
+    auto next_pos = cur_pos + 1;
+    if (next_pos == remainder.end() || !IsDigit(*next_pos))
+      return std::nullopt;
+  }
+  if (IsDigit(*(cur_pos)) || *(cur_pos) == '.') {
+    while (IsNumberBodyChar(*cur_pos))
+      cur_pos++;
+
+    if (*cur_pos == '.') {
+      isFloat = true;
+      cur_pos++;
+      while (IsNumberBodyChar(*cur_pos))
+        cur_pos++;
+
+      // Check if there's an exponent
+      char prev_ch = *(cur_pos - 1);
+      if (prev_ch == 'e' || prev_ch == 'E' || prev_ch == 'p' ||
+          prev_ch == 'P') {
+        if (*(cur_pos) == '+' || *(cur_pos) == '-') {
+          cur_pos++;
+          while (IsNumberBodyChar(*cur_pos))
+            cur_pos++;
+        }
+      }
+    }
+
+    llvm::StringRef number = remainder.substr(0, cur_pos - remainder.begin());
+    if (remainder.consume_front(number))
+      return number;
   }
   return std::nullopt;
 }
@@ -106,9 +135,12 @@ llvm::Expected<Token> DILLexer::Lex(llvm::StringRef expr,
     return Token(Token::eof, "", (uint32_t)expr.size());
 
   uint32_t position = cur_pos - expr.begin();
-  std::optional<llvm::StringRef> maybe_number = IsNumber(expr, remainder);
-  if (maybe_number)
-    return Token(Token::numeric_constant, maybe_number->str(), position);
+  bool isFloat = false;
+  std::optional<llvm::StringRef> maybe_number = IsNumber(remainder, isFloat);
+  if (maybe_number) {
+    auto kind = isFloat ? Token::floating_constant : Token::integer_constant;
+    return Token(kind, maybe_number->str(), position);
+  }
   std::optional<llvm::StringRef> maybe_word = IsWord(expr, remainder);
   if (maybe_word)
     return Token(Token::identifier, maybe_word->str(), position);
diff --git a/lldb/source/ValueObject/DILParser.cpp b/lldb/source/ValueObject/DILParser.cpp
index eac41fab90763..35eb6d62b7ba4 100644
--- a/lldb/source/ValueObject/DILParser.cpp
+++ b/lldb/source/ValueObject/DILParser.cpp
@@ -179,10 +179,13 @@ ASTNodeUP DILParser::ParsePostfixExpression() {
 // Parse a primary_expression.
 //
 //  primary_expression:
+//    numeric_literal
 //    id_expression
 //    "(" expression ")"
 //
 ASTNodeUP DILParser::ParsePrimaryExpression() {
+  if (CurToken().IsOneOf({Token::integer_constant, Token::floating_constant}))
+    return ParseNumericLiteral();
   if (CurToken().IsOneOf(
           {Token::coloncolon, Token::identifier, Token::l_paren})) {
     // Save the source location for the diagnostics message.
@@ -346,6 +349,7 @@ void DILParser::BailOut(const std::string &error, uint32_t loc,
   m_dil_lexer.ResetTokenIdx(m_dil_lexer.NumLexedTokens() - 1);
 }
 
+// FIXME: Remove this once subscript operator uses ScalarLiteralNode.
 // Parse a integer_literal.
 //
 //  integer_literal:
@@ -370,6 +374,74 @@ std::optional<int64_t> DILParser::ParseIntegerConstant() {
   return std::nullopt;
 }
 
+// Parse a numeric_literal.
+//
+//  numeric_literal:
+//    ? Token::integer_constant ?
+//    ? Token::floating_constant ?
+//
+ASTNodeUP DILParser::ParseNumericLiteral() {
+  ASTNodeUP numeric_constant;
+  if (CurToken().Is(Token::integer_constant))
+    numeric_constant = ParseIntegerLiteral();
+  else
+    numeric_constant = ParseFloatingPointLiteral();
+  if (numeric_constant->GetKind() == NodeKind::eErrorNode) {
+    BailOut(llvm::formatv("Failed to parse token as numeric-constant: {0}",
+                          CurToken()),
+            CurToken().GetLocation(), CurToken().GetSpelling().length());
+    return std::make_unique<ErrorNode>();
+  }
+  m_dil_lexer.Advance();
+  return numeric_constant;
+}
+
+ASTNodeUP DILParser::ParseIntegerLiteral() {
+  Token token = CurToken();
+  auto spelling = token.GetSpelling();
+  llvm::StringRef spelling_ref = spelling;
+
+  auto radix = llvm::getAutoSenseRadix(spelling_ref);
+  bool is_unsigned = false, is_long = false, is_longlong = false;
+  if (spelling_ref.consume_back_insensitive("ll"))
+    is_longlong = true;
+  if (spelling_ref.consume_back_insensitive("l"))
+    is_long = true;
+  if (spelling_ref.consume_back_insensitive("u"))
+    is_unsigned = true;
+
+  llvm::APInt raw_value;
+  if (!spelling_ref.getAsInteger(radix, raw_value)) {
+    Scalar scalar_value(raw_value);
+    return std::make_unique<ScalarLiteralNode>(token.GetLocation(),
+                                               scalar_value, radix, is_unsigned,
+                                               is_long, is_longlong);
+  }
+  return std::make_unique<ErrorNode>();
+}
+
+ASTNodeUP DILParser::ParseFloatingPointLiteral() {
+  Token token = CurToken();
+  auto spelling = token.GetSpelling();
+  llvm::StringRef spelling_ref = spelling;
+
+  bool is_float = false;
+  llvm::APFloat raw_float(llvm::APFloat::IEEEdouble());
+  if (spelling_ref.consume_back_insensitive("f")) {
+    is_float = true;
+    raw_float = llvm::APFloat(llvm::APFloat::IEEEsingle());
+  }
+
+  auto StatusOrErr = raw_float.convertFromString(
+      spelling_ref, llvm::APFloat::rmNearestTiesToEven);
+  if (!errorToBool(StatusOrErr.takeError())) {
+    Scalar scalar_value(raw_float);
+    return std::make_unique<ScalarLiteralNode>(token.GetLocation(),
+                                               scalar_value, is_float);
+  }
+  return std::make_unique<ErrorNode>();
+}
+
 void DILParser::Expect(Token::Kind kind) {
   if (CurToken().IsNot(kind)) {
     BailOut(llvm::formatv("expected {0}, got: {1}", kind, CurToken()),
diff --git a/lldb/test/API/commands/frame/var-dil/basics/ArraySubscript/TestFrameVarDILArraySubscript.py b/lldb/test/API/commands/frame/var-dil/basics/ArraySubscript/TestFrameVarDILArraySubscript.py
index 0f56057189395..1d0340160f5e4 100644
--- a/lldb/test/API/commands/frame/var-dil/basics/ArraySubscript/TestFrameVarDILArraySubscript.py
+++ b/lldb/test/API/commands/frame/var-dil/basics/ArraySubscript/TestFrameVarDILArraySubscript.py
@@ -66,7 +66,7 @@ def test_subscript(self):
         self.expect(
             "frame var 'int_arr[1.0]'",
             error=True,
-            substrs=["expected 'r_square', got: <'.'"],
+            substrs=["failed to parse integer constant: <'1.0' (floating_constant)>"],
         )
 
         # Base should be a "pointer to T" and index should be of an integral type.
@@ -88,7 +88,7 @@ def test_subscript(self):
         self.expect(
             "frame var '1[2]'",
             error=True,
-            substrs=["Unexpected token"],
+            substrs=["subscripted value is not an array or pointer"],
         )
 
         # Base should not be a pointer to void
diff --git a/lldb/test/API/commands/frame/var-dil/basics/Indirection/TestFrameVarDILIndirection.py b/lldb/test/API/commands/frame/var-dil/basics/Indirection/TestFrameVarDILIndirection.py
index 38c72131d797c..28eba4f1a70bc 100644
--- a/lldb/test/API/commands/frame/var-dil/basics/Indirection/TestFrameVarDILIndirection.py
+++ b/lldb/test/API/commands/frame/var-dil/basics/Indirection/TestFrameVarDILIndirection.py
@@ -35,7 +35,7 @@ def test_frame_var(self):
         self.expect(
             "frame variable '*1'",
             error=True,
-            substrs=["Unexpected token: <'1' (numeric_constant)>"],
+            substrs=["dereference failed: not a pointer, reference or array type"],
         )
         self.expect(
             "frame variable '*val'",
diff --git a/lldb/test/API/commands/frame/var-dil/expr/Arithmetic/Makefile b/lldb/test/API/commands/frame/var-dil/expr/Arithmetic/Makefile
new file mode 100644
index 0000000000000..99998b20bcb05
--- /dev/null
+++ b/lldb/test/API/commands/frame/var-dil/expr/Arithmetic/Makefile
@@ -0,0 +1,3 @@
+CXX_SOURCES := main.cpp
+
+include Makefile.rules
diff --git a/lldb/test/API/commands/frame/var-dil/expr/Arithmetic/TestFrameVarDILArithmetic.py b/lldb/test/API/commands/frame/var-dil/expr/Arithmetic/TestFrameVarDILArithmetic.py
new file mode 100644
index 0000000000000..57a636ebb0829
--- /dev/null
+++ b/lldb/test/API/commands/frame/var-dil/expr/Arithmetic/TestFrameVarDILArithmetic.py
@@ -0,0 +1,39 @@
+"""
+Test DIL arithmetic.
+"""
+
+import lldb
+from lldbsuite.test.lldbtest import *
+from lldbsuite.test.decorators import *
+from lldbsuite.test import lldbutil
+
+
+class TestFrameVarDILArithmetic(TestBase):
+    NO_DEBUG_INFO_TESTCASE = True
+
+    def test_arithmetic(self):
+        self.build()
+        lldbutil.run_to_source_breakpoint(
+            self, "Set a breakpoint here", lldb.SBFileSpec("main.cpp")
+        )
+
+        self.runCmd("settings set target.experimental.use-DIL true")
+
+        # Check number parsing
+        self.expect_var_path("1.0", value="1", type="double")
+        self.expect_var_path("1.0f", value="1", type="float")
+...
[truncated]

kuilpd · 2025-08-06T13:51:54Z

PickLiteralType follows C++ logic, but this can later be put in a plugin if the logic doesn't apply everywhere. I'm not sure how to test this though, since the type is picked by width, so the resulting type name can be different on different machines.

I also split lexing number into 2 tokens to avoid parsing a single number token twice just to determine which number that is. I had to make getAutoSenseRadix in StringRef global to avoid copying that code into DIL, hopefully it's OK for it to be part of this patch and not a separate PR.

kuhar

Can you move the ADT changes to a separate PR?

Needed in #152308

labath

I have a feeling that this would actually be better off as two node types (float and integer), as each stage (lexer, parser and evaluator) handles the two differently. Maybe then the integer node could use llvm::APInt directly, which would make the is_unsigned field look less out of place.

The implementation makes sense to me, but I don't know if we've reached consensus that this is the way this should work.

labath · 2025-08-08T10:24:28Z

lldb/include/lldb/ValueObject/DILAST.h

What's up with the &? I haven't used these before. Is there any benefit to that?

Um, it's probably an accident. I think it means that it can only be called on an lvalues, but there's no point for it here.

labath · 2025-08-08T10:26:32Z

lldb/source/ValueObject/DILEval.cpp

I think there's more auto here than we usually have in llvm: https://llvm.org/docs/CodingStandards.html#use-auto-type-deduction-to-make-code-more-readable

labath · 2025-08-08T10:28:10Z

lldb/source/ValueObject/DILEval.cpp

Is this different than return symbol_context.module_sp->GetTypeSystemForLanguage(language) ? without the type of auto, I can't see...

labath · 2025-08-08T10:32:02Z

lldb/source/ValueObject/DILParser.cpp

c++ allows both llu and ull, so this will need to be smarter if you want to accept both.

labath · 2025-08-08T10:36:55Z

lldb/source/ValueObject/DILLexer.cpp

Suggested change

if (IsDigit(*(cur_pos)) || *(cur_pos) == '.') {

if (IsDigit(*cur_pos) || *cur_pos == '.') {

labath · 2025-08-08T10:40:17Z

lldb/source/ValueObject/DILLexer.cpp

How does anything in this function deal with running off the end of the String(Ref)?

(This is kind of why I prefer using the StringRef methods (consume_front, drop_while, etc.) instead of working with iterators/indexes directly.)

I guess data in StringRef is not guaranteed to be null-terminated, I added a check for it.
I tried implementing it with StringRef functions, but I need to consume parts multiple times, or even only one character, and every time it produces a separate StringRef; it was kind of messy.

Yeah, it doesn't have to be. However, the check you've added here is hardly sufficient. Even simply incrementing the end iterator is UB so a comparison like cur_pos > remainder.end() doesn't make sense. A sufficiently smart compiler might even optimize that away. You pretty much need check for EOF every time you increment the iterator.

StringRef functions can be unwieldy if you try to implement them like you would do it with iterators/indexes. However, there is often a better (and shorter) way to express that with the high-level methods. So, something like "sequence of numbers, maybe followed by a dot, followed by another sequence of numbers, where one of the sequences is not empty" might be better expressed as "sequence of numbers and dots, which contains at most one dot and at least one number", which you can write as:

StringRef number = input.take_while([](char c){ return IsNumberBodyChar(c) || c == '.'; } ); size_t dots = number.count('.'); if (dots > 1|| dots == number.size()) return bad_number;

It can't be this simple, there are check for certain characters at certain positions, like a number must follow a '.' if that's the first character, or - and + are considered part of the number only following an exponent character. I redid without using iterators anyway, but it's not as compact.

labath · 2025-08-08T10:45:50Z

lldb/source/ValueObject/DILEval.cpp

Invert the condition and make this an early exit: https://llvm.org/docs/CodingStandards.html#use-early-exits-and-continue-to-simplify-code

labath · 2025-08-08T10:47:13Z

lldb/source/ValueObject/DILEval.cpp

Can this use Scalar::TruncOrExtendTo?

Yes, I didn't notice this function.

labath · 2025-08-08T10:48:01Z

lldb/source/ValueObject/DILEval.cpp

No else after return: https://llvm.org/docs/CodingStandards.html#don-t-use-else-after-a-return

kuilpd · 2025-08-08T15:05:57Z

I have a feeling that this would actually be better off as two node types (float and integer), as each stage (lexer, parser and evaluator) handles the two differently. Maybe then the integer node could use llvm::APInt directly, which would make the is_unsigned field look less out of place.

The implementation makes sense to me, but I don't know if we've reached consensus that this is the way this should work.

The node will also have boolean literals and nullptr later, they are all stored in a Scalar, so I don't think we should do 4 nodes for this. I can add them in this PR as well.

We didn't really discuss how literals are parsed and stored, only the math and type promotion part, which I will get to in the next PR.

lldb/include/lldb/ValueObject/DILAST.h

cmtice · 2025-08-08T22:41:33Z

lldb/include/lldb/ValueObject/DILAST.h

This leaves m_is_unsigned, m_radix, m_is_long and m_is_longlong uninitialized. m_radix being uninitialized might be a problem.

labath

The node will also have boolean literals and nullptr later, they are all stored in a Scalar, so I don't think we should do 4 nodes for this.

Why not? If all you needed was a single Scalar field that is handled uniformly, then sure. However, here you have a lot of additional data that only makes sense for a particular kind of literals. And if every use of these nodes is going to be followed by a switch on the literal type, then I don't think it saves much over declaring a separate class for each one. And it avoids questions like "what does is_floating_point() mean for a nullptr?" ...

I can add them in this PR as well.

Let's not do that as that brings up the discussion about what to do with keywords.

We didn't really discuss how literals are parsed and stored,

That's fine, that can be handled on the PR itself (i.e., here).

only the math and type promotion part, which I will get to in the next PR.

That's the part I'm not sure about the consensus for, so I am going to ask someone else to review this as well.

lldb/source/ValueObject/DILParser.cpp

lldb/include/lldb/ValueObject/DILAST.h

labath · 2025-08-11T15:54:06Z

lldb/source/ValueObject/DILLexer.cpp

Yeah, it doesn't have to be. However, the check you've added here is hardly sufficient. Even simply incrementing the end iterator is UB so a comparison like cur_pos > remainder.end() doesn't make sense. A sufficiently smart compiler might even optimize that away. You pretty much need check for EOF every time you increment the iterator.

StringRef functions can be unwieldy if you try to implement them like you would do it with iterators/indexes. However, there is often a better (and shorter) way to express that with the high-level methods. So, something like "sequence of numbers, maybe followed by a dot, followed by another sequence of numbers, where one of the sequences is not empty" might be better expressed as "sequence of numbers and dots, which contains at most one dot and at least one number", which you can write as:

StringRef number = input.take_while([](char c){ return IsNumberBodyChar(c) || c == '.'; } ); size_t dots = number.count('.'); if (dots > 1|| dots == number.size()) return bad_number;

Michael137 · 2025-08-12T10:16:55Z

lldb/source/ValueObject/DILEval.cpp

Might've missed the discussion on this but why can't this live in the TypeSystem plugin? Are these heuristics in here something you came up with or did we copy it from the C++ plugin?

The code for integers is mostly from lldb-eval, but it's not the same code as in clang implementation or anywhere else that I could find. It follows the C99 standard, except for not outputting a warning with unsigned long long like clang does. We could put in in TypeSystem, but I'm still not sure how to test this, since the resulting type can be different on different machines.

I think the idea was to implement the code in a type system, not reuse an existing implementation from there. And I guess it could be implemented there, but there's no reason to do that (in a world where we the DIL defines its own integer parsing/promotion rules, and the implementation only "happens" to match C(++)).

As for testing, this is tested in python, right? Couldn't you write some code to find the bit width of a type in the current target, and then generate the test cases (INT_MAX+/-1 or whatever) for that? Since you don't really need much from a Target to test integer promotion, it's also possible to set up a test using a Target not matching the current host. The biggest problem is needing a frame object. If we had "target variable" support the dil syntax (which it currently does not, but I believe it should, as it's weird that the two commands have different implementations), then it would be fairly easy as target variable works without a Process (or a Frame) object.

Cat try that, except still not sure how to check if the type is signed/unsigned, SBType doesn't have such an intefrace.
Can I just compare the result to that of full EvaluateExpression? Or would it be too slow for testing?

Can you call GetBasicType and check that the result is (one of the) unsigned enum(s)?

I would say the expression evaluator would be too slow, but I'd like to avoid to prevent someone from getting an idea that we're matching c++ semantics.

I tried this, please take a look. I also realized we could just put a bunch of literals with auto type in main.cpp and compare DIL results to them. Faster, can try more cases, but if we don't want it to look like we have the same semantics as C++ (which we totally do in this case), we can keep the python code I just wrote.

That could be fine too, but I think that what you have here is pretty good as well. I don't see a reason to change it.

labath · 2025-08-14T12:25:39Z

lldb/include/lldb/ValueObject/DILAST.h

Suggested change

: ASTNode(location, NodeKind::eScalarLiteralNode), m_value(value),

: ASTNode(location, NodeKind::eScalarLiteralNode), m_value(std::move(value)),

labath · 2025-08-14T12:25:56Z

lldb/include/lldb/ValueObject/DILAST.h

Suggested change

llvm::APInt GetValue() const { return m_value; }

const llvm::APInt& GetValue() const { return m_value; }

labath · 2025-08-14T12:27:10Z

lldb/include/lldb/ValueObject/DILAST.h

The two are mutually exclusive, so maybe make an enum out of that?

lldb/include/lldb/ValueObject/DILAST.h

labath · 2025-08-14T12:31:08Z

lldb/include/lldb/ValueObject/DILAST.h

Isn't this kind of encoded in the "semantics" property of the APFloat value?

labath · 2025-08-14T12:55:41Z

lldb/source/ValueObject/DILLexer.cpp

Suggested change

if (!tail.empty() && (tail.front() == '+' || tail.front() == '-')) {

tail = tail.drop_front();

tail = tail.drop_while(IsNumberBodyChar);

}

if (tail.consume_front("+") || tail.consume_front("-"))

tail = tail.drop_while(IsNumberBodyChar);

labath · 2025-08-14T12:59:56Z

lldb/source/ValueObject/DILParser.cpp

You're not using anything from the node object in the error case. Could failure be signalled by a nullptr. Or should this maybe forward the error node?

labath · 2025-08-14T13:00:45Z

lldb/source/ValueObject/DILParser.cpp

labath · 2025-08-14T13:03:46Z

lldb/test/API/commands/frame/var-dil/expr/Literals/TestFrameVarDILLiterals.py

if int and long are the same size, then long and long long likely aren't, so you could perform the same check with those two.

labath · 2025-08-14T13:05:35Z

lldb/source/ValueObject/DILEval.cpp

This would definitely look better with an enum

labath · 2025-08-14T13:07:28Z

There's still too much auto in the patch. Particularly those that hide llvm::Expected are dangerous. Apart from that, and despite the number of comments, I think this is pretty good.

Michael137 · 2025-08-14T13:25:46Z

lldb/include/lldb/ValueObject/DILEval.h

Should we leave a comment saying that this is not copied from any particular integer promotion rules, and specific to the DIL DSL?

It's not integer promotion, just type inference from a literal's value and suffixes. Technically, right now it's exactly C99 rules, so I can add a comment that as of now this follows C99 rules.

cmtice · 2025-08-14T18:56:12Z

lldb/docs/dil-expr-lang.ebnf

Why not use 'integer_literal' here, rather than repeating the integer literal definition?

Ah, it's just a leftover from the subscript node that still directly parses a signed integer, it'll be removed later

cmtice · 2025-08-14T19:01:54Z

lldb/include/lldb/ValueObject/DILLexer.h

Nit: I prefer 'float_constant' to 'floating_constant'. But I don't feel strongly about this.

labath · 2025-08-15T08:44:45Z

lldb/unittests/ValueObject/DILLexerTests.cpp

What about 0xe+whatever

I added this case, and then found out that actual C++ gives an error on numbers like 0xe+1, saying that's a wrong suffix. Our lexer will now treat it as 2 separate numbers, which seems to be more reasonable, or is there some case again with this format that I'm missing?

Yes, that was a surprise to me as well. It seems that msvc accepts this (recognizes it as addition of two integers) but gcc and clang do reject it. I asked around, and it seems that gcc&clang are correct here (the standard even explicitly states that 0xe+foo is a single token).

Given that, I'm not going to insist on this interpretation. I think what you have is fine (we're a different language after all, and I believe C++'s woes come mainly from the preprocessor, which I hope we won't have), but I'd be also fine with reverting those changes for the sake of (bug) compatibility with c++, or in order to simplify things.

labath · 2025-08-15T08:50:20Z

lldb/source/ValueObject/DILLexer.cpp

Right, I forgot about 1e+1. However, I still don't see how this would prevent 0xe+1 from being parsed as a number (when it should be parsed as an addition of two integers).

labath · 2025-08-15T09:01:06Z

lldb/source/ValueObject/DILEval.cpp

Does this handle INT_MIN correctly? -128 is fits into a 8 bit signed integer, but you need 8 bits to represent "128". Maybe you need isSignedIntN ?

Only positive numbers from bare literals are handled by this function, a minus will make it a unary node and negate the number afterwards.

labath · 2025-08-15T09:12:04Z

lldb/source/ValueObject/DILEval.cpp

I think this boils down to

if (type_system) return type_system->GetBasicTypeFromAST(basic_type); return {};

At which point, the only purpose of this function is to check for nullness of the type system. But I don't think you need that as you could check for its validity in the caller early on. After all, without a type system, you're not going to be able to create /any/ constant.

I shortened this, but let's leave the type system check, because in case someone calls this with an empty type system, I'd like it not to crash

labath · 2025-08-15T09:38:46Z

lldb/source/ValueObject/DILEval.cpp

I'm afraid I don't see the difference, and I've spent a lot of time looking at this function.
Here's my attempt at an equivalent version. I think it should be equivalent, but if it's not, then maybe that means we should make the differences more prominent.

SmallVector<std::pair<BasicType, BasicType>, 3> candidates; if (literal->GetTypeSuffix() <= IntegerTypeSuffix::None) candidates.emplace_back(eBasicTypeInt, unsigned_is_allowed ? eBasicTypeUInt : eBasicTypeInvalid); if (literal->GetTypeSuffix() <= IntegerTypeSuffix::Long) candidates.emplace_back(eBasicTypeLong, unsigned_is_allowed ? eBasicTypeULong : eBasicTypeInvalid); candidates.emplace_back(eBasicTypeLongLong, eBasicTypeULLong); for (auto [signed_, unsigned_] : candidates) { CompilerType signed_type = type_system->GetBasicTypeFromAST(signed_); if (!signed_type) continue; llvm::Expected<uint64_t> size = signed_type.GetBitSize(ctx.get()); if (!size) return size.takeError(); if (!literal.IsUnsigned() && apint.isIntN(*size-1)) return signed_type; if (unsigned_ != eBasicTypeInvalid && apint.isIntN(*size)) return type_system->GetBasicTypeFromAST(unsigned_) } return llvm::make_error(...);

A small difference is that this continues if it can't find (e.g.) the int type. I think this is fine -- if somehow we do end up with a type system that does not have an int type, then it makes sense to try the next larger one.

kuilpd · 2025-08-19T17:53:20Z

@labath
I have the next PR standing by, is this one ready for merging?

labath

Sorry about the delay. I was out last week.

labath · 2025-08-26T10:52:09Z

lldb/unittests/ValueObject/DILLexerTests.cpp

Yes, that was a surprise to me as well. It seems that msvc accepts this (recognizes it as addition of two integers) but gcc and clang do reject it. I asked around, and it seems that gcc&clang are correct here (the standard even explicitly states that 0xe+foo is a single token).

Given that, I'm not going to insist on this interpretation. I think what you have is fine (we're a different language after all, and I believe C++'s woes come mainly from the preprocessor, which I hope we won't have), but I'd be also fine with reverting those changes for the sake of (bug) compatibility with c++, or in order to simplify things.

…arsing

…52308)" This reverts commit 51230d9.

…155605) Reverts #152308

…155610) This patch introduces `ScalarLiteralNode` without any uses by other nodes yet. It also includes lexing and parsing for integer and floating point numbers. Reapplies #152308 with a fix.

… in DIL" (#155605) Reverts llvm/llvm-project#152308

kuilpd requested review from labath, cmtice and jimingham August 6, 2025 13:51

kuilpd requested a review from JDevlieghere as a code owner August 6, 2025 13:51

kuilpd added the lldb label Aug 6, 2025

llvmbot added llvm:support llvm:adt labels Aug 6, 2025

kuhar reviewed Aug 6, 2025

View reviewed changes

kuilpd mentioned this pull request Aug 7, 2025

[ADT] Make getAutoSenseRadix in StringRef global #152503

Merged

kuilpd added a commit that referenced this pull request Aug 7, 2025

[ADT] Make getAutoSenseRadix in StringRef global (#152503)

2ff44d7

Needed in #152308

kuilpd force-pushed the DIL-upstream-add-scalar branch from 27466c3 to 3b9d987 Compare August 7, 2025 18:26

kuhar requested review from kuhar and removed request for kuhar August 7, 2025 18:40

labath reviewed Aug 8, 2025

View reviewed changes

cmtice reviewed Aug 8, 2025

View reviewed changes

lldb/include/lldb/ValueObject/DILAST.h Outdated Show resolved Hide resolved

cmtice reviewed Aug 8, 2025

View reviewed changes

labath reviewed Aug 11, 2025

View reviewed changes

Michael137 reviewed Aug 12, 2025

View reviewed changes

labath reviewed Aug 14, 2025

View reviewed changes

Michael137 reviewed Aug 14, 2025

View reviewed changes

cmtice requested a review from werat August 14, 2025 15:03

cmtice reviewed Aug 14, 2025

View reviewed changes

labath reviewed Aug 15, 2025

View reviewed changes

kuilpd requested a review from labath August 25, 2025 10:25

labath approved these changes Aug 26, 2025

View reviewed changes

kuilpd added 17 commits August 27, 2025 14:34

Add ScalarLiteralNode

6c8013a

Lex and parse floating numbers; split number tokens to avoid double p…

b355c88

…arsing

Automatically pick integer literal type

1320777

Adjust APInt bitwidth

902f72a

Adjust code formatting

bfed95b

Fix suffix parsing

6989af6

Add remainder.end() guard

967549e

Redo IsNumber without iterators

5eb7dc2

Split ScalarLiteralNode

45498a4

Add tests for PickIntegerType

199dd80

Parser and Eval code cleanup

f5fffc8

Expand float lexing

a172686

Add long<longlong test

1882999

Add '0xe+1' case to lexing

6a06470

Code cleanup

cd9dfc6

Refactor PickIntegerType

8d44b9d

Fix a test after rebasing

0a7aa2b

kuilpd force-pushed the DIL-upstream-add-scalar branch from 14b8ea4 to 0a7aa2b Compare August 27, 2025 10:22

kuilpd merged commit 51230d9 into llvm:main Aug 27, 2025
10 checks passed

kuilpd added a commit that referenced this pull request Aug 27, 2025

Revert "[LLDB] Add ScalarLiteralNode and literal parsing in DIL (#1…

4a1613b

…52308)" This reverts commit 51230d9.

kuilpd mentioned this pull request Aug 27, 2025

Revert "[LLDB] Add ScalarLiteralNode and literal parsing in DIL" #155605

Merged

kuilpd added a commit that referenced this pull request Aug 27, 2025

Revert "[LLDB] Add ScalarLiteralNode and literal parsing in DIL" (#…

241d160

…155605) Reverts #152308

kuilpd mentioned this pull request Aug 27, 2025

Reapply "[LLDB] Add ScalarLiteralNode and literal parsing in DIL" #155610

Merged

llvm-sync bot pushed a commit to arm/arm-toolchain that referenced this pull request Aug 27, 2025

Automerge: Revert "[LLDB] Add ScalarLiteralNode and literal parsing…

c3ed26f

… in DIL" (#155605) Reverts llvm/llvm-project#152308

	if (IsDigit((cur_pos)) \|\| (cur_pos) == '.') {
	if (IsDigit(cur_pos) \|\| cur_pos == '.') {

	: ASTNode(location, NodeKind::eScalarLiteralNode), m_value(value),
	: ASTNode(location, NodeKind::eScalarLiteralNode), m_value(std::move(value)),

	llvm::APInt GetValue() const { return m_value; }
	const llvm::APInt& GetValue() const { return m_value; }

-      if (!tail.empty() && (tail.front() == '+' || tail.front() == '-')) {
-        tail = tail.drop_front();
-        tail = tail.drop_while(IsNumberBodyChar);
-      }
+      if (tail.consume_front("+") || tail.consume_front("-"))
+        tail = tail.drop_while(IsNumberBodyChar);

[LLDB] Add ScalarLiteralNode and literal parsing in DIL #152308

[LLDB] Add ScalarLiteralNode and literal parsing in DIL #152308

Uh oh!

Conversation

kuilpd commented Aug 6, 2025

Uh oh!

llvmbot commented Aug 6, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

kuilpd commented Aug 6, 2025

Uh oh!

kuhar left a comment

Choose a reason for hiding this comment

Uh oh!

labath left a comment

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

kuilpd commented Aug 8, 2025

Uh oh!

Uh oh!

Choose a reason for hiding this comment

Uh oh!

labath left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

kuilpd Aug 13, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Uh oh!

[LLDB] Add `ScalarLiteralNode` and literal parsing in DIL #152308

[LLDB] Add `ScalarLiteralNode` and literal parsing in DIL #152308

llvmbot commented Aug 6, 2025 •

edited

Loading

kuilpd Aug 13, 2025 •

edited

Loading